# AMAR ET AL. - COUNTERING MISINFORMATION EARLY (2025)
## REPLICATION FILE: 06_attrition_endline.R
### This script models attrition in the main endline.
# ----
# Attrition predictors ----
attrition_predictors <- list(
  list(iv = "treatment", label = "Treatment", type = "Condition"),
  list(iv = "gender", label = "Gender", type = "Demographic"),
  list(iv = "class", label = "Grade", type = "Demographic"),
  list(iv = "age", label = "Age", type = "Demographic"),
  list(iv = "religion_hindu_num", label = "Religion - Hindu", type = "Demographic"),
  list(iv = "language_hindu_num", label = "Language - Hindi", type = "Demographic"),
  list(iv = "asset_index", label = "Asset Index", type = "Demographic"),
  list(iv = "fathers_education", label = "Father's Education", type = "Education"),
  list(iv = "mothers_education", label = "Mother's Education", type = "Education"),
  list(iv = "school_gov_num", label = "Government School", type = "Education"),
  list(iv = "science", label = "Science Knowledge", type = "Education"),
  list(iv = "mobile_internet_num", label = "Mobile Internet", type = "Demographic"),
  list(iv = "trust_newspapers_num", label = "Newspapers", type = "Trust"),
  list(iv = "trust_social_media_num", label = "Social Media", type = "Trust"),
  list(iv = "trust_tv_num", label = "TV", type = "Trust"),
  list(iv = "trust_friends_family_num", label = "Friends and Family", type = "Trust"),
  list(iv = "vaccinated_num", label = "Vaccinated", type = "Trust"),
  list(iv = "ayurveda_effective_num", label = "Ayurveda", type = "Trust"))

attrition_predictors <- rbindlist(attrition_predictors, fill = TRUE, use.names = TRUE)
attrition_predictors <- data.frame(attrition_predictors)

# function to tabulate regressions for each dv, with baseline controls
tab_regression_attrition <- function(dv, iv, design) {
  fml <- as.formula(paste(dv, "~", iv, "+ library_spillover_pre"))  
  mod <- svyglm(fml, design = design)
  out <- tidy(mod) %>%
    mutate(
      dv = dv, 
      iv = iv,
      n = nobs(mod),
      .before = term
    )
  out
}


regressions_attrition <- lapply(attrition_predictors$iv, function(iv) {
  tab_regression_attrition("attrition", iv, bimli_svy_dkr.rm)
  }) %>%
  bind_rows() %>%
  mutate(
    margin = qnorm(0.975) * std.error,
    lower = estimate - margin,
    upper = estimate + margin
  ) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term))

regressions_attrition <- left_join(attrition_predictors, regressions_attrition, by = "iv") %>%
  rowwise() %>%
  mutate(
    label_rec = case_when(grepl(iv, term) & nchar(term) > nchar(iv) ~ paste(label, "-", sub(iv, "", term)),
                     TRUE ~ label)) %>%
  ungroup()

## Plot ----
regressions_attrition %>%
  mutate(significant = if_else(p.value <= 0.05, "Yes", "No")) %>%
  ggplot(aes(x = estimate, y = fct_rev(label_rec), color = significant)) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point() +
  geom_errorbar(aes(xmin = lower, xmax = upper), width = 0) +
  facet_grid(rows = "type", scales = "free_y", space = "free_y", switch = "y") +
  xlab("Predictors of attrition (1 = endline not completed)") +
  scale_y_discrete(position = "right") +
  scale_color_manual(values = c("black", "red")) +
  theme_bw() %+replace%
  theme(axis.title.y = element_blank(),
        strip.text.y.left = element_text(angle = 0),
        legend.position = "none")
ggsave("output/figures/attrition_predictors.pdf", height = 5, width = 8)

## Table ----
regressions_attrition %>%
  mutate(
    p.value_bh = p.adjust(p.value, method = 'BH', n = length(p.value)),
  ) %>%
  #filter(idx == type & secondary_outcome == FALSE) %>% # filter to variables that are included in the index
  select(label_rec, n, estimate, std.error, p.value, p.value_bh) %>%
  mutate(n = format(n, big.mark = ","),
         sig.stars = case_when(p.value < 0.001 ~ "***",
                               p.value < 0.01 ~ "**",
                               p.value < 0.05 ~ "*",
                               TRUE ~ ""),
         estimate = str_c(format(round(estimate*100, 2), nsmall = 2), sig.stars), # add significance stars to estimate
         std.error = round(std.error, 3),
         p.value = case_when(p.value > 0.9 ~ "$>$0.9",
                             p.value < 0.001 ~ "$<$0.001",
                             p.value < 0.01 ~ as.character(round(p.value, 3)),
                             TRUE ~ as.character(round(p.value, 2))),
         p.value_bh = case_when(p.value_bh > 0.9 ~ "$>$0.9",
                             p.value_bh < 0.001 ~ "$<$0.001",
                             p.value_bh < 0.01 ~ as.character(round(p.value_bh, 3)),
                             TRUE ~ as.character(round(p.value_bh, 2)))) %>%
  select(-sig.stars) %>%
  rename(Predictor = label_rec,
         N = n,
         `Estimate*100` = estimate,
         SE = std.error,
         "p-value" = p.value,
         "p-value (FDR)" = p.value_bh,) %>%
  xtable::xtable(caption = "Attrition predictors", align = "lllllll", digits = 3,
                 label = "tab:attrition") %>%
  print(include.rownames = FALSE, 
        caption.placement = "top",
        sanitize.text.function = identity, # add backslashes to special characters
        sanitize.colnames.function = \(x) str_c("\\textbf{", x, "}"),
        add.to.row = list(pos = list(nrow(.)), 
                          command = paste(
                              "\\hline\n\\multicolumn{6}{l}{\\footnotesize *p$<$0.05; **p$<$0.01; ***p$<$0.001. Models include library-spillover strata FEs.} \\\\\n")),
        hline.after = c(-1,0), # add horizontal lane after second to last column
        comment = FALSE,
        table.placement = getOption("xtable.table.placement", "h!"),
        file = paste0("output/tables/tab_attrition", ".tex"))

# Attrition predictors by condition ----
# function to tabulate regressions for each dv, with baseline controls
tab_regression_attrition_int <- function(dv, iv, design) {
  fml <- as.formula(paste(dv, "~", iv, "+", "treatment+", iv, "* treatment", "+ library_spillover_pre"))  
  mod <- svyglm(fml, design = design)
  out <- tidy(mod) %>%
    mutate(
      dv = dv, 
      iv = iv,
      n = nobs(mod),
      .before = term
    )
  out
}

attrition_predictors_int <- attrition_predictors %>%
  filter(iv != "treatment")

regressions_attrition_int <- lapply(attrition_predictors_int$iv, function(iv) {
  tab_regression_attrition_int("attrition", iv, bimli_svy_dkr.rm)
  }) %>%
  bind_rows() %>%
  mutate(
    margin = qnorm(0.975) * std.error,
    lower = estimate - margin,
    upper = estimate + margin
  ) %>%
  filter(term != "(Intercept)" & !grepl("library_spillover_pre", term) &
           grepl(":treatment", term))

regressions_attrition_int <- left_join(attrition_predictors_int, regressions_attrition_int, by = "iv") %>%
  rowwise() %>%
  mutate(
    label_rec = case_when(grepl(iv, term) & nchar(term) > nchar(iv) + nchar(":treatmentMedia Literacy") ~ paste(label, "-", sub(iv, "", term)),
                     TRUE ~ label),
    label_rec = sub(":treatmentMedia Literacy", "",label_rec),
    label_rec = paste0(label_rec, " * T")) %>%
  ungroup()

## Plot ----
regressions_attrition_int %>%
  mutate(significant = if_else(p.value <= 0.05, "Yes", "No")) %>%
  ggplot(aes(x = estimate, y = fct_rev(label_rec), color = significant)) +
  geom_vline(xintercept = 0, lty = 2) +
  geom_point() +
  geom_errorbar(aes(xmin = lower, xmax = upper), width = 0) +
  facet_grid(rows = "type", scales = "free_y", space = "free_y", switch = "y") +
  xlab("Interaction effect of covariates and treatment on attrition") +
  scale_y_discrete(position = "right") +
  scale_color_manual(values = c("black", "red")) +
  theme_bw() %+replace%
  theme(axis.title.y = element_blank(),
        strip.text.y.left = element_text(angle = 0),
        legend.position = "none")
ggsave("output/figures/attrition_predictors_interaction.pdf", height = 5, width = 8)


## Table ----
regressions_attrition_int %>%
  mutate(
    p.value_bh = p.adjust(p.value, method = 'BH', n = length(p.value)),
  ) %>%
  select(label_rec, n, estimate, std.error, p.value, p.value_bh) %>%
  mutate(n = format(n, big.mark = ","),
         sig.stars = case_when(p.value < 0.001 ~ "***",
                               p.value < 0.01 ~ "**",
                               p.value < 0.05 ~ "*",
                               TRUE ~ ""),
         estimate = str_c(format(round(estimate*100, 2), nsmall = 2), sig.stars), # add significance stars to estimate
         std.error = round(std.error, 3),
         p.value = case_when(p.value > 0.9 ~ "$>$0.9",
                             p.value < 0.001 ~ "$<$0.001",
                             p.value < 0.01 ~ as.character(round(p.value, 3)),
                             TRUE ~ as.character(round(p.value, 2))),
         p.value_bh = case_when(p.value_bh > 0.9 ~ "$>$0.9",
                                p.value_bh < 0.001 ~ "$<$0.001",
                                p.value_bh < 0.01 ~ as.character(round(p.value_bh, 3)),
                                TRUE ~ as.character(round(p.value_bh, 2)))) %>%
  select(-sig.stars) %>%
  rename(Predictor = label_rec,
         N = n,
         `Estimate*100` = estimate,
         SE = std.error,
         "p-value" = p.value,
         "p-value (FDR)" = p.value_bh) %>%
  xtable::xtable(caption = "Attrition predictors (*treatment)", align = "lllllll", digits = 3,
                 label = "tab:attrition_int") %>%
  print(include.rownames = FALSE, 
        caption.placement = "top",
        sanitize.text.function = identity, # add backslashes to special characters
        sanitize.colnames.function = \(x) str_c("\\textbf{", x, "}"),
        add.to.row = list(pos = list(nrow(.)), 
                          command = paste(
                              "\\hline\n\\multicolumn{6}{l}{\\footnotesize *p$<$0.05; **p$<$0.01; ***p$<$0.001. Models include library-spillover strata FEs.} \\\\\n")),
        hline.after = c(-1,0), # add horizontal lane after second to last column
        comment = FALSE,
        table.placement = getOption("xtable.table.placement", "h!"),
        file = paste0("output/tables/tab_attrition_int", ".tex"))

# END of 06_attrition_endline.R ----
